/*-------------------------------------------------------------------------
 *
 * genfile.c
 *        Functions for direct access to files
 *
 *
 * Copyright (c) 2004-2017, PostgreSQL Global Development Group
 *
 * Author: Andreas Pflug <pgadmin@pse-consulting.de>
 *
 * IDENTIFICATION
 *      src/backend/utils/adt/genfile.c
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include <sys/file.h>
#include <sys/stat.h>
#include <unistd.h>
#include <dirent.h>

#include "access/htup_details.h"
#include "access/xlog_internal.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "postmaster/syslogger.h"
#include "storage/fd.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/timestamp.h"

typedef struct
{
    char       *location;
    DIR           *dirdesc;
    bool        include_dot_dirs;
} directory_fctx;


/*
 * Convert a "text" filename argument to C string, and check it's allowable.
 *
 * Filename may be absolute or relative to the DataDir, but we only allow
 * absolute paths that match DataDir or Log_directory.
 */
static char *
convert_and_check_filename(text *arg)
{
    char       *filename;

    filename = text_to_cstring(arg);
    canonicalize_path(filename);    /* filename can change length here */

    if (is_absolute_path(filename))
    {
        /* Disallow '/a/b/data/..' */
        if (path_contains_parent_reference(filename))
            ereport(ERROR,
                    (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
                     (errmsg("reference to parent directory (\"..\") not allowed"))));

        /*
         * Allow absolute paths if within DataDir or Log_directory, even
         * though Log_directory might be outside DataDir.
         */
        if (!path_is_prefix_of_path(DataDir, filename) &&
            (!is_absolute_path(Log_directory) ||
             !path_is_prefix_of_path(Log_directory, filename)))
            ereport(ERROR,
                    (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
                     (errmsg("absolute path not allowed"))));
    }
    else if (!path_is_relative_and_below_cwd(filename))
        ereport(ERROR,
                (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
                 (errmsg("path must be in or below the current directory"))));

    return filename;
}


/*
 * Read a section of a file, returning it as bytea
 *
 * Caller is responsible for all permissions checking.
 *
 * We read the whole of the file when bytes_to_read is negative.
 */
static bytea *
read_binary_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
                 bool missing_ok)
{// #lizard forgives
    bytea       *buf;
    size_t        nbytes;
    FILE       *file;

    if (bytes_to_read < 0)
    {
        if (seek_offset < 0)
            bytes_to_read = -seek_offset;
        else
        {
            struct stat fst;

            if (stat(filename, &fst) < 0)
            {
                if (missing_ok && errno == ENOENT)
                    return NULL;
                else
                    ereport(ERROR,
                            (errcode_for_file_access(),
                             errmsg("could not stat file \"%s\": %m", filename)));
            }

            bytes_to_read = fst.st_size - seek_offset;
        }
    }

    /* not sure why anyone thought that int64 length was a good idea */
    if (bytes_to_read > (MaxAllocSize - VARHDRSZ))
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                 errmsg("requested length too large")));

    if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL)
    {
        if (missing_ok && errno == ENOENT)
            return NULL;
        else
            ereport(ERROR,
                    (errcode_for_file_access(),
                     errmsg("could not open file \"%s\" for reading: %m",
                            filename)));
    }

    if (fseeko(file, (off_t) seek_offset,
               (seek_offset >= 0) ? SEEK_SET : SEEK_END) != 0)
        ereport(ERROR,
                (errcode_for_file_access(),
                 errmsg("could not seek in file \"%s\": %m", filename)));

    buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);

    nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);

    if (ferror(file))
        ereport(ERROR,
                (errcode_for_file_access(),
                 errmsg("could not read file \"%s\": %m", filename)));

    SET_VARSIZE(buf, nbytes + VARHDRSZ);

    FreeFile(file);

    return buf;
}

/*
 * Similar to read_binary_file, but we verify that the contents are valid
 * in the database encoding.
 */
static text *
read_text_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
               bool missing_ok)
{
    bytea       *buf;

    buf = read_binary_file(filename, seek_offset, bytes_to_read, missing_ok);

    if (buf != NULL)
    {
        /* Make sure the input is valid */
        pg_verifymbstr(VARDATA(buf), VARSIZE(buf) - VARHDRSZ, false);

        /* OK, we can cast it to text safely */
        return (text *) buf;
    }
    else
        return NULL;
}

/*
 * Read a section of a file, returning it as text
 */
Datum
pg_read_file(PG_FUNCTION_ARGS)
{
    text       *filename_t = PG_GETARG_TEXT_PP(0);
    int64        seek_offset = 0;
    int64        bytes_to_read = -1;
    bool        missing_ok = false;
    char       *filename;
    text       *result;

    if (!superuser())
        ereport(ERROR,
                (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
                 (errmsg("must be superuser to read files"))));

    /* handle optional arguments */
    if (PG_NARGS() >= 3)
    {
        seek_offset = PG_GETARG_INT64(1);
        bytes_to_read = PG_GETARG_INT64(2);

        if (bytes_to_read < 0)
            ereport(ERROR,
                    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                     errmsg("requested length cannot be negative")));
    }
    if (PG_NARGS() >= 4)
        missing_ok = PG_GETARG_BOOL(3);

    filename = convert_and_check_filename(filename_t);

    result = read_text_file(filename, seek_offset, bytes_to_read, missing_ok);
    if (result)
        PG_RETURN_TEXT_P(result);
    else
        PG_RETURN_NULL();
}

/*
 * Read a section of a file, returning it as bytea
 */
Datum
pg_read_binary_file(PG_FUNCTION_ARGS)
{
    text       *filename_t = PG_GETARG_TEXT_PP(0);
    int64        seek_offset = 0;
    int64        bytes_to_read = -1;
    bool        missing_ok = false;
    char       *filename;
    bytea       *result;

    if (!superuser())
        ereport(ERROR,
                (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
                 (errmsg("must be superuser to read files"))));

    /* handle optional arguments */
    if (PG_NARGS() >= 3)
    {
        seek_offset = PG_GETARG_INT64(1);
        bytes_to_read = PG_GETARG_INT64(2);

        if (bytes_to_read < 0)
            ereport(ERROR,
                    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                     errmsg("requested length cannot be negative")));
    }
    if (PG_NARGS() >= 4)
        missing_ok = PG_GETARG_BOOL(3);

    filename = convert_and_check_filename(filename_t);

    result = read_binary_file(filename, seek_offset,
                              bytes_to_read, missing_ok);
    if (result)
        PG_RETURN_BYTEA_P(result);
    else
        PG_RETURN_NULL();
}


/*
 * Wrapper functions for the 1 and 3 argument variants of pg_read_file()
 * and pg_binary_read_file().
 *
 * These are necessary to pass the sanity check in opr_sanity, which checks
 * that all built-in functions that share the implementing C function take
 * the same number of arguments.
 */
Datum
pg_read_file_off_len(PG_FUNCTION_ARGS)
{
    return pg_read_file(fcinfo);
}

Datum
pg_read_file_all(PG_FUNCTION_ARGS)
{
    return pg_read_file(fcinfo);
}

Datum
pg_read_binary_file_off_len(PG_FUNCTION_ARGS)
{
    return pg_read_binary_file(fcinfo);
}

Datum
pg_read_binary_file_all(PG_FUNCTION_ARGS)
{
    return pg_read_binary_file(fcinfo);
}

/*
 * stat a file
 */
Datum
pg_stat_file(PG_FUNCTION_ARGS)
{
    text       *filename_t = PG_GETARG_TEXT_PP(0);
    char       *filename;
    struct stat fst;
    Datum        values[6];
    bool        isnull[6];
    HeapTuple    tuple;
    TupleDesc    tupdesc;
    bool        missing_ok = false;

    if (!superuser())
        ereport(ERROR,
                (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
                 (errmsg("must be superuser to get file information"))));

    /* check the optional argument */
    if (PG_NARGS() == 2)
        missing_ok = PG_GETARG_BOOL(1);

    filename = convert_and_check_filename(filename_t);

    if (stat(filename, &fst) < 0)
    {
        if (missing_ok && errno == ENOENT)
            PG_RETURN_NULL();
        else
            ereport(ERROR,
                    (errcode_for_file_access(),
                     errmsg("could not stat file \"%s\": %m", filename)));
    }

    /*
     * This record type had better match the output parameters declared for me
     * in pg_proc.h.
     */
    tupdesc = CreateTemplateTupleDesc(6, false);
    TupleDescInitEntry(tupdesc, (AttrNumber) 1,
                       "size", INT8OID, -1, 0);
    TupleDescInitEntry(tupdesc, (AttrNumber) 2,
                       "access", TIMESTAMPTZOID, -1, 0);
    TupleDescInitEntry(tupdesc, (AttrNumber) 3,
                       "modification", TIMESTAMPTZOID, -1, 0);
    TupleDescInitEntry(tupdesc, (AttrNumber) 4,
                       "change", TIMESTAMPTZOID, -1, 0);
    TupleDescInitEntry(tupdesc, (AttrNumber) 5,
                       "creation", TIMESTAMPTZOID, -1, 0);
    TupleDescInitEntry(tupdesc, (AttrNumber) 6,
                       "isdir", BOOLOID, -1, 0);
    BlessTupleDesc(tupdesc);

    memset(isnull, false, sizeof(isnull));

    values[0] = Int64GetDatum((int64) fst.st_size);
    values[1] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_atime));
    values[2] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_mtime));
    /* Unix has file status change time, while Win32 has creation time */
#if !defined(WIN32) && !defined(__CYGWIN__)
    values[3] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime));
    isnull[4] = true;
#else
    isnull[3] = true;
    values[4] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime));
#endif
    values[5] = BoolGetDatum(S_ISDIR(fst.st_mode));

    tuple = heap_form_tuple(tupdesc, values, isnull);

    pfree(filename);

    PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
}

/*
 * stat a file (1 argument version)
 *
 * note: this wrapper is necessary to pass the sanity check in opr_sanity,
 * which checks that all built-in functions that share the implementing C
 * function take the same number of arguments
 */
Datum
pg_stat_file_1arg(PG_FUNCTION_ARGS)
{
    return pg_stat_file(fcinfo);
}

/*
 * List a directory (returns the filenames only)
 */
Datum
pg_ls_dir(PG_FUNCTION_ARGS)
{// #lizard forgives
    FuncCallContext *funcctx;
    struct dirent *de;
    directory_fctx *fctx;
    MemoryContext oldcontext;

    if (!superuser())
        ereport(ERROR,
                (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
                 (errmsg("must be superuser to get directory listings"))));

    if (SRF_IS_FIRSTCALL())
    {
        bool        missing_ok = false;
        bool        include_dot_dirs = false;

        /* check the optional arguments */
        if (PG_NARGS() == 3)
        {
            if (!PG_ARGISNULL(1))
                missing_ok = PG_GETARG_BOOL(1);
            if (!PG_ARGISNULL(2))
                include_dot_dirs = PG_GETARG_BOOL(2);
        }

        funcctx = SRF_FIRSTCALL_INIT();
        oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

        fctx = palloc(sizeof(directory_fctx));
        fctx->location = convert_and_check_filename(PG_GETARG_TEXT_PP(0));

        fctx->include_dot_dirs = include_dot_dirs;
        fctx->dirdesc = AllocateDir(fctx->location);

        if (!fctx->dirdesc)
        {
            if (missing_ok && errno == ENOENT)
            {
                MemoryContextSwitchTo(oldcontext);
                SRF_RETURN_DONE(funcctx);
            }
            else
                ereport(ERROR,
                        (errcode_for_file_access(),
                         errmsg("could not open directory \"%s\": %m",
                                fctx->location)));
        }
        funcctx->user_fctx = fctx;
        MemoryContextSwitchTo(oldcontext);
    }

    funcctx = SRF_PERCALL_SETUP();
    fctx = (directory_fctx *) funcctx->user_fctx;

    while ((de = ReadDir(fctx->dirdesc, fctx->location)) != NULL)
    {
        if (!fctx->include_dot_dirs &&
            (strcmp(de->d_name, ".") == 0 ||
             strcmp(de->d_name, "..") == 0))
            continue;

        SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(de->d_name));
    }

    FreeDir(fctx->dirdesc);

    SRF_RETURN_DONE(funcctx);
}

/*
 * List a directory (1 argument version)
 *
 * note: this wrapper is necessary to pass the sanity check in opr_sanity,
 * which checks that all built-in functions that share the implementing C
 * function take the same number of arguments.
 */
Datum
pg_ls_dir_1arg(PG_FUNCTION_ARGS)
{
    return pg_ls_dir(fcinfo);
}

/* Generic function to return a directory listing of files */
static Datum
pg_ls_dir_files(FunctionCallInfo fcinfo, char *dir)
{
    FuncCallContext *funcctx;
    struct dirent *de;
    directory_fctx *fctx;

    if (SRF_IS_FIRSTCALL())
    {
        MemoryContext oldcontext;
        TupleDesc    tupdesc;

        funcctx = SRF_FIRSTCALL_INIT();
        oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

        fctx = palloc(sizeof(directory_fctx));

        tupdesc = CreateTemplateTupleDesc(3, false);
        TupleDescInitEntry(tupdesc, (AttrNumber) 1, "name",
                           TEXTOID, -1, 0);
        TupleDescInitEntry(tupdesc, (AttrNumber) 2, "size",
                           INT8OID, -1, 0);
        TupleDescInitEntry(tupdesc, (AttrNumber) 3, "modification",
                           TIMESTAMPTZOID, -1, 0);
        funcctx->tuple_desc = BlessTupleDesc(tupdesc);

        fctx->location = pstrdup(dir);
        fctx->dirdesc = AllocateDir(fctx->location);

        if (!fctx->dirdesc)
            ereport(ERROR,
                    (errcode_for_file_access(),
                     errmsg("could not read directory \"%s\": %m",
                            fctx->location)));

        funcctx->user_fctx = fctx;
        MemoryContextSwitchTo(oldcontext);
    }

    funcctx = SRF_PERCALL_SETUP();
    fctx = (directory_fctx *) funcctx->user_fctx;

    while ((de = ReadDir(fctx->dirdesc, fctx->location)) != NULL)
    {
        Datum        values[3];
        bool        nulls[3];
        char        path[MAXPGPATH * 2];
        struct stat attrib;
        HeapTuple    tuple;

        /* Skip hidden files */
        if (de->d_name[0] == '.')
            continue;

        /* Get the file info */
        snprintf(path, sizeof(path), "%s/%s", fctx->location, de->d_name);
        if (stat(path, &attrib) < 0)
            ereport(ERROR,
                    (errcode_for_file_access(),
                     errmsg("could not stat directory \"%s\": %m", dir)));

        /* Ignore anything but regular files */
        if (!S_ISREG(attrib.st_mode))
            continue;

        values[0] = CStringGetTextDatum(de->d_name);
        values[1] = Int64GetDatum((int64) attrib.st_size);
        values[2] = TimestampTzGetDatum(time_t_to_timestamptz(attrib.st_mtime));
        memset(nulls, 0, sizeof(nulls));

        tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
        SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
    }

    FreeDir(fctx->dirdesc);
    SRF_RETURN_DONE(funcctx);
}

/* Function to return the list of files in the log directory */
Datum
pg_ls_logdir(PG_FUNCTION_ARGS)
{
    return pg_ls_dir_files(fcinfo, Log_directory);
}

/* Function to return the list of files in the WAL directory */
Datum
pg_ls_waldir(PG_FUNCTION_ARGS)
{
    return pg_ls_dir_files(fcinfo, XLOGDIR);
}
